Introduction

This notebook leverages self-reported EPA emissions data from both direct emitters and suppliers to quantify greenhouse gas (GHG) emissions by attributing them to corporate parent companies. By allocating each facility’s emissions based on its ownership stakes, the analysis provides a comprehensive measure of corporate carbon responsibility. For more detailed information on the EPA’s Greenhouse Gas Reporting Program (GHGRP), please visit the EPA’s website or consult the EPA helpdesk FAQ. Additional documentation on the methodology and context for this analysis can be found in the project repository README file.

Corporate Emissions (National)

EPA GHGP Summary Data

This code block processes EPA GHGRP summary data files stored in the “Data/Raw/GHGP_data_summary_spreadsheets” directory. The data is sourced from Excel files for each year from 2016 to 2023 that contain multiple sheets (e.g., “Direct Point Emitters”, “Onshore Oil & Gas Prod.”, etc.). Although data is available from 2010 onward, note that the additional sheets—such as those for onshore oil & gas production, gathering & boosting, transmission pipelines, LDC - Direct Emissions, SF₆ from Electrical Equipment, and Suppliers—are not available before 2016. Consequently, the code loops over the specified years and sheets to combine the available data into a single list, with each element corresponding to a specific sheet type.

# Define years of interest (note: data for 2010-2015 may be incomplete)
years <- 2016:2023  

# List of sheet names to be read from each Excel file
sheets_to_read <- c(
  "Direct Point Emitters", 
  "Onshore Oil & Gas Prod.", 
  "Gathering & Boosting",
  "Transmission Pipelines", 
  "LDC - Direct Emissions",
  "SF6 from Elec. Equip.", 
  "Suppliers"
)

# Set the directory where GHGRP summary spreadsheets are stored
local_data_dir <- here("Data", "Raw", "EPA_GHGRP_Data_Summary_Spreadsheets")

# Initialize an empty list to store combined data for each sheet type
GHGP_combined_data <- setNames(vector("list", length(sheets_to_read)), sheets_to_read)

# Loop through each year
for (year in years) {
  file_name <- paste0("ghgp_data_", year, ".xlsx")
  file_path <- file.path(local_data_dir, file_name)

  # Check if file exists; if not, skip the year
  if (!file.exists(file_path)) {
    message("File not found for year ", year, ": ", file_path)
    next
  }

  # Get the list of sheets available in the current file
  available_sheets <- excel_sheets(file_path)

  # Loop through each desired sheet
  for (sheet in sheets_to_read) {
    if (sheet %in% available_sheets) {
      message("Reading ", sheet, " for year ", year)
      # Attempt to read the sheet; skip the first 3 rows (adjust as needed)
      sheet_data <- tryCatch({
        read_excel(file_path, sheet = sheet, skip = 3, col_types = "text") %>%
          clean_names() %>%                          # Standardize column names
          mutate(year = year, .before = 1,
                 facility_name = str_replace_all(facility_name, "[[:punct:]]", ""),
                 facility_name = str_remove(facility_name, "[-/(].*")) %>% 
          mutate(across(everything(), toupper)) %>% 
          mutate(across(everything(), str_squish))
      }, error = function(e) {
        message("Failed to read sheet: ", sheet, " in year ", year, " — ", e$message)
        NULL
      })

      # If the sheet was successfully read, combine it with any previous data
      if (!is.null(sheet_data)) {
        GHGP_combined_data[[sheet]] <- bind_rows(GHGP_combined_data[[sheet]], sheet_data)
      }
    } else {
      message("Sheet not found: ", sheet, " in year ", year)
    }
  }
}

# Print the combined data for verification
head(GHGP_combined_data$`Direct Point Emitters`) %>% gt() %>% gt::fmt_auto()
year facility_id frs_id facility_name city state zip_code address county latitude longitude primary_naics_code industry_type_subparts industry_type_sectors total_reported_direct_emissions co2_emissions_non_biogenic methane_ch4_emissions nitrous_oxide_n2o_emissions hfc_emissions pfc_emissions sf6_emissions nf3_emissions other_fully_fluorinated_ghg_emissions hfe_emissions very_short_lived_compounds_emissions other_gh_gs_metric_tons_co2e biogenic_co2_emissions_metric_tons stationary_combustion electricity_generation adipic_acid_production aluminum_production ammonia_manufacturing cement_production electronics_manufacture ferroalloy_production fluorinated_ghg_production glass_production hcfc_22_production_from_hfc_23_destruction hydrogen_production iron_and_steel_production lead_production lime_production magnesium_production miscellaneous_use_of_carbonates nitric_acid_production petroleum_and_natural_gas_systems_offshore_production petroleum_and_natural_gas_systems_processing petroleum_and_natural_gas_systems_transmission_compression petroleum_and_natural_gas_systems_underground_storage petroleum_and_natural_gas_systems_lng_storage petroleum_and_natural_gas_systems_lng_import_export petrochemical_production petroleum_refining phosphoric_acid_production pulp_and_paper_manufacturing silicon_carbide_production soda_ash_manufacturing titanium_dioxide_production underground_coal_mines zinc_production municipal_landfills industrial_wastewater_treatment manufacture_of_electric_transmission_and_distribution_equipment industrial_waste_landfills is_some_co2_collected_on_site_and_used_to_manufacture_other_products_and_therefore_not_emitted_from_the_affected_manufacturing_process_unit_s_as_reported_under_subpart_g_or_s is_some_co2_reported_as_emissions_from_the_affected_manufacturing_process_unit_s_under_subpart_aa_g_or_p_collected_and_transferred_off_site_or_injected_as_reported_under_subpart_pp does_the_facility_employ_continuous_emissions_monitoring
2018 1004377.0 110043803578 121 REGIONAL DISPOSAL FACILITY MELISSA TX 75454 3820 SAM RAYBURN HIGHWAY COLLIN COUNTY 33.29857 -96.53586 562212 HH WASTE 653854.0 NA 653854.0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 653854.0 NA NA NA N N N
2018 1010040.0 110071159872 15185651518662 HAZARD KY 40701 1021 TORI DRIVE PERRY COUNTY 37.274127 -83.239034 212112 FF OTHER 125981.75 NA 125981.75 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 125981.75 NA NA NA NA NA N N N
2018 1010085.0 110071159657 1519015 HAZARD KY 41701 1845 S. KY HWY 15 PERRY COUNTY 37.236617 -83.18126 212112 FF OTHER 93918.75 NA 93918.75 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 93918.75 NA NA NA NA NA N N N
2018 1001155.0 110070942964 1500 SOUTH TIBBS LLC DBA AURORIUM INDIANAPOLIS LLC INDIANAPOLIS IN 46242 1500 SOUTH TIBBS AVENUE MARION COUNTY 39.73 -86.26 325199 C CHEMICALS 72163.662 72064.6 41.25 57.812 NA NA NA NA NA NA NA NA 4771.1 72163.662 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA N N N
2018 1000112.0 110071159492 23RD AND 3RD BROOKLYN NY 11232 730 3RD AVENUE KINGS 40.663 -74.0 221112 C,D POWER PLANTS 70705.288 70633.5 32.75 39.038 NA NA NA NA NA NA NA NA NA 250.1 70455.188 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA N N N
2018 1003742.0 110038508335 31ST STREET LANDFILL WESTCHESTER IL 60154 11700 W 31ST ST COOK COUNTY 41.834962 -87.916392 562212 C,HH WASTE 31220.15 168.4 31051.75 NA NA NA NA NA NA NA NA NA NA 168.4 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 31051.75 NA NA NA N N N
head(GHGP_combined_data$Suppliers) %>% gt() %>% gt::fmt_auto()
year facility_id frs_id facility_name city state zip_code address county latitude longitude primary_naics_code industry_type_subparts ghg_quantity_associated_with_coal_based_liquid_fuel_production ghg_quantity_associated_with_petroleum_products_produced ghg_quantity_associated_with_petroleum_products_imported ghg_quantity_associated_with_petroleum_products_exported ghg_quantity_associated_with_natural_gas_supply ghg_quantity_associated_with_natural_gas_liquids_supply ghg_quantity_associated_with_co2_supply
2016 1012147 NA 17Z GAS PLANT CHEVRON USA INC MCKITTRICK CA 93251 22845 HIGHWAY 33 NA 35.318187100000003 -119.6338842 211112 C,NN-FRAC,W-PROC NA NA NA NA NA 162409.2 NA
2016 1002574 110070834967 3M COMPANY ST PAUL MN 55144 3M CENTER RAMSEY COUNTY 45.032510000000002 -93.094190999999995 339999 OO NA NA NA NA NA NA NA
2016 1004206 110000367567 3M COMPANY DECATUR AL 35609 1400 STATE DOCKS RD. MORGAN COUNTY 34.641666999999998 -87.038611000000003 325211 C,L,OO NA NA NA NA NA NA NA
2016 1006665 110013886875 3M CORDOVA CORDOVA IL 61242 22614 ROUTE 84 NORTH ROCK ISLAND COUNTY 41.755000000000003 -90.284166999999997 325998 C,L,OO NA NA NA NA NA NA NA
2016 1004836 110000423667 3M COTTAGE GROVE CENTER SITE COTTAGE GROVE MN 55016 10746 INNOVATION RD WASHINGTON COUNTY 44.789444000000003 -92.908332999999999 325998 C,L,N,OO NA NA NA NA NA NA NA
2016 1004761 110013812918 ACE ETHANOL LLC STANLEY WI 54768 815 WEST MAPLE STREET CHIPPEWA COUNTY 44.9589 -90.960800000000006 325193 C,PP NA NA NA NA NA NA CONFIDENTIAL
#print(head(GHGP_combined_data$`Onshore Oil & Gas Prod.`))
#print(head(GHGP_combined_data$`Gathering & Boosting`))
#print(head(GHGP_combined_data$`Transmission Pipelines`))
#print(head(GHGP_combined_data$`LDC - Direct Emissions`))
#print(head(GHGP_combined_data$`SF6 from Elec. Equip.`))

NAICS Code Data

This section loads the NAICS Codes data, which provides industry classification codes and titles. This information is essential for contextualizing the sectors in which the facilities operate, supporting further analysis. The NAICS codes file is obtained from the Census website. This data will be merged with the EPA emissions data to assign industry titles to each facility based on its NAICS code.

# Define local path using `here`
naics_local_path <- here("Data", "Raw", "NAICS_Codes", "6-digit_2022_Codes.xlsx")

# Read the file, skipping any third column (like notes or descriptions)
naics <- read_excel(
  naics_local_path,
  col_names = TRUE,
  col_types = c('numeric', 'text', 'skip')  # skip third column if present
)

# Select and clean only the relevant columns
naics <- naics %>%
  select(`2022 NAICS Code`, `2022 NAICS Title`) %>%
  slice(-1) # remove first row it's blank

# Create a manual patch table (for missing naics codes in the ghgp)
naics_patch <- tribble(
  ~`2022 NAICS Code`, ~`2022 NAICS Title`,
  211112, "Natural Gas Liquid Extraction",
  325188, "All Other Basic Inorganic Chemical Manufacturing",
  325192, "Other Synthetic Organic Dye and Pigment Manufacturing",
  333999, "All Other Miscellaneous General Purpose Machinery Manufacturing",
  335222, "Household Refrigerator and Home Freezer Manufacturing",
  336111, "Automobile Manufacturing",
  441310, "Automotive Parts and Accessories Stores"
)

# Bind the patch table to the main NAICS data
naics <- bind_rows(naics, naics_patch) %>% 
  mutate(across(everything(), toupper)) %>% 
  mutate(`2022 NAICS Code` = as.numeric(`2022 NAICS Code`))

# Print
head(naics) %>% gt() %>% gt::fmt_auto()
2022 NAICS Code 2022 NAICS Title
111,110  SOYBEAN FARMING
111,120  OILSEED (EXCEPT SOYBEAN) FARMING
111,130  DRY PEA AND BEAN FARMING
111,140  WHEAT FARMING
111,150  CORN FARMING
111,160  RICE FARMING

GHGP Parent Company Data

This section loads the EPA GHGRP Parent Company Data file available from the EPA.It contains information on the parent companies that own (or partially own) facilities reporting GHG emissions, including the percentage of ownership.Combining data across multiple years (2016-2023) allows us to link facility-level emissions with ownership details, enabling the attribution of emissions to corporate entities.

# Define path
parent_company_path <- here("Data", "Raw", "EPA_GHGRP_Data_Parent_Company", "ghgp_data_parent_company.xlsb")

# Confirmed sheet names (adjust if needed)
sheet_names <- c("2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023")

# Initialize progress bar
pb <- progress_bar$new(total = length(sheet_names), format = "Reading sheet :sheet [:bar] :percent")

# Read each sheet with error handling and progress update
parent_company_data_combined <- map(sheet_names, function(sheet) {
  pb$tick(tokens = list(sheet = sheet))
  tryCatch(
    read_xlsb(parent_company_path, sheet = sheet),
    error = function(e) {
      message("Failed to read sheet: ", sheet)
      NULL
    }
  )
})

# Combine all sheets in the list into one dataframe
parent_company_data_full <- bind_rows(parent_company_data_combined, .id = "year_index")

# Convert year column to character for merge later
parent_company_data <- parent_company_data_full %>%
  clean_names() %>% 
  mutate(across(everything(), toupper)) %>%
  mutate(facility_name = str_remove(facility_name, "[-/(].*"),
         facility_name = str_replace_all(facility_name, "[[:punct:]]", ""),
         parent_company_name = str_replace_all(parent_company_name, "[[:punct:]]", "")) %>% 
  mutate(across(everything(), str_squish)) %>% 
  mutate(Facility = as.character(ghgrp_facility_id),
         Year = as.numeric(reporting_year),
         parent_co_percent_ownership = as.numeric(parent_co_percent_ownership),
         .keep="unused")

# Preview the first few rows of the combined NAICS codes data.
#glimpse(parent_company_data) 

Emissions Data Wrangling

The emissions data are presented in several different sections of EPA’s spreadsheet, so here we compile them all together. Emissions are measured in metric tons of CO2 equivalent, or tCO₂e (see: https://www.theguardian.com/environment/2011/apr/27/co2e-global-warming-potential).

Direct Emitter Facilities

This section processes the EPA GHGRP emissions data for direct emitter facilities. The data is sourced from multiple spreadsheets (e.g., “Direct Point Emitters”, “Onshore Oil & Gas Prod.”, etc.) contained within the GHGRP summary files. Each sheet contains different emission metrics measured in metric tons of CO2 equivalent (tCO₂e). The code begins by defining a mapping of sheet names to their corresponding column names for emissions data, ensuring that the correct fields are extracted from each file. It then initializes an empty data frame to compile data across all sheets. For each sheet, the code selects key variables such as facility ID, facility name, NAICS code, and the specified emissions metric, converting the emissions values to numeric format. After compiling the data, it aggregates entries for duplicate facility IDs by summing their emissions. Next, the code merges this aggregated data with the NAICS codes data—sourced from the Census—to assign industry titles to each facility based on its NAICS code. Finally, the data is split by year, and for each year the top 10 direct emitters are displayed using the gt package to generate formatted tables. This structured approach not only cleans and aggregates the emissions data but also contextualizes it by industry, facilitating a clear, year-by-year comparison of facility emissions.

# Correct list of emission field names with the accurate column names from your data frames
emissions_field_names <- list(
  "Direct Point Emitters" = "total_reported_direct_emissions",
  "Onshore Oil & Gas Prod." = "total_reported_emissions_from_onshore_oil_gas_production",  # Corrected name
  "Gathering & Boosting" = "total_reported_emissions_from_gathering_boosting",             # Corrected name
  "Transmission Pipelines" = "total_reported_direct_emissions_from_transmission_pipelines",
  "LDC - Direct Emissions" = "total_reported_direct_emissions_from_local_distribution_companies",
  "SF6 from Elec. Equip." = "total_reported_direct_emissions_from_electrical_equipment_use"
)

# Create an empty data frame to store emissions data compiled from direct emitters
fac_directemitters_data <- data.frame(
  Facility = character(),
  Name = character(),
  `2022 NAICS Code` = character(),
  `Emissions (tCO₂e)` = numeric(),
  Year = character(),
  stringsAsFactors = FALSE
)

# Compile emissions data across direct emitters for CO2 and NOₓ
for (key in names(emissions_field_names)) {
  dump <- GHGP_combined_data[[key]] %>%
    dplyr::select(facility_id, facility_name, primary_naics_code, year, all_of(emissions_field_names[[key]])) %>%
    rename(
      Facility = facility_id,
      Name = facility_name,
      `2022 NAICS Code` = primary_naics_code,
      Year = year,
      `Emissions (tCO₂e)` = all_of(emissions_field_names[[key]])
    )

  dump$`Emissions (tCO₂e)` <- as.numeric(dump$`Emissions (tCO₂e)`)

  fac_directemitters_data <- bind_rows(fac_directemitters_data, dump)
}

# Aggregate duplicate facility IDs
fac_directemitters_data <- fac_directemitters_data %>%
  group_by(Year, Facility, Name, `2022 NAICS Code`) %>%
  summarise(`Emissions (tCO₂e)` = sum(`Emissions (tCO₂e)`, na.rm = TRUE), .groups = "drop")

# Join with NAICS data
fac_directemitters_data  <- fac_directemitters_data  %>%
  mutate(`2022 NAICS Code` = as.numeric(`2022 NAICS Code`)) %>% 
    left_join(naics, by = "2022 NAICS Code")

fac_directemitters_data_table <- fac_directemitters_data %>% 
  select(-`2022 NAICS Code`, -Facility) %>% 
  rename(`NAICS Title` = `2022 NAICS Title`) %>% 
  arrange(desc(`Emissions (tCO₂e)`))

# Split the full data frame by year
fac_directemitters_by_year <- split(fac_directemitters_data_table, fac_directemitters_data_table$Year) 

# For each year, create and render a GT table showing the top 10 direct emitter parent companies
lapply(fac_directemitters_by_year, function(df_year) {
  current_year <- unique(df_year$Year)
  df_year %>%
    arrange(desc(`Emissions (tCO₂e)`)) %>%
    slice_head(n = 10) %>% 
    gt() %>%
    tab_header(
      title = paste("EPA GHGRP Direct Emitters –", current_year),
      subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
    ) %>%
    fmt_number(
      columns = vars(`Emissions (tCO₂e)`),
      decimals = 0,
      use_seps = TRUE
    ) %>%
    cols_width(
      Name ~ px(300),
      `NAICS Title` ~ px(300),
      `Emissions (tCO₂e)` ~ px(200)
    ) %>%
    cols_align(
      align = "center",
      columns = everything()
    )
})
$2016
EPA GHGRP Direct Emitters – 2016
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2016 HILCORP NORTH SLOPE BASIN 890GB FACILITY 3,274,432 NA
2016 SAN JUAN BASIN 580 2,759,591 NA
2016 SAN JOAQUIN BASIN GATHERING BOOSTING CHEVRON USA INC 2,750,199 NA
2016 COG OPERATING LLC 430 PERMIAN BASIN 2,443,823 NA
2016 EP ENERGY EP 220 GULF COAST BASIN GB 2,034,697 NA
2016 DCP MIDSTREAM 430 PERMIAN BASIN 1,928,560 NA
2016 ARKOMA BASIN BG 1,817,086 NA
2016 WILLIAMS GB FACILITY APPALACHIAN BASIN EASTERN OVERTHRUST AREA 160A 1,757,937 SUPPORT ACTIVITIES FOR OIL AND GAS OPERATIONS
2016 DCP MIDSTREAM 360 ANADARKO BASIN 1,740,508 NA
2016 CRI 395 WILLISTON 1,674,608 NA
$2017
EPA GHGRP Direct Emitters – 2017
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2017 HILCORP NORTH SLOPE BASIN 890GB FACILITY 3,726,705 NA
2017 SAN JOAQUIN BASIN GATHERING BOOSTING CHEVRON USA INC 2,486,522 CRUDE PETROLEUM EXTRACTION 
2017 SAN JUAN BASIN 580 2,224,914 NA
2017 DGC ARKOMA BG 1,875,182 NA
2017 WILLIAMS GB FACILITY APPALACHIAN BASIN EASTERN OVERTHRUST AREA 160A 1,869,197 SUPPORT ACTIVITIES FOR OIL AND GAS OPERATIONS
2017 DCP MIDSTREAM 430 PERMIAN BASIN 1,841,403 NATURAL GAS EXTRACTION
2017 430 PERMIAN BASIN 1,640,321 NA
2017 CRI 395 WILLISTON 1,625,650 NA
2017 DCP MIDSTREAM 360 ANADARKO BASIN 1,606,662 NATURAL GAS EXTRACTION
2017 WHITING OIL AND GAS CORPORATION 395 WILLISTON BASIN 1,504,611 NA
$2018
EPA GHGRP Direct Emitters – 2018
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2018 JAMES H MILLER JR 18,429,639 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 SCHERER 16,695,176 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 MONROE 16,400,875 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 GIBSON 16,325,781 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 MARTIN LAKE 14,870,599 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 LABADIE 14,846,788 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 W A PARISH 14,620,769 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 GEN J M GAVIN 14,472,667 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 NAVAJO GENERATING STATION 13,960,706 FOSSIL FUEL ELECTRIC POWER GENERATION
2018 BOWEN 13,437,167 FOSSIL FUEL ELECTRIC POWER GENERATION
$2019
EPA GHGRP Direct Emitters – 2019
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2019 JAMES H MILLER JR 19,169,743 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 MONROE 15,144,724 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 COLSTRIP 14,277,559 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 LABADIE 14,180,190 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 SCHERER 13,764,419 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 GEN J M GAVIN 13,044,750 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 W A PARISH 12,862,472 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 PRAIRIE STATE GENERATING STATION 12,737,155 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 MARTIN LAKE 12,704,073 FOSSIL FUEL ELECTRIC POWER GENERATION
2019 OAK GROVE 12,336,777 FOSSIL FUEL ELECTRIC POWER GENERATION
$2020
EPA GHGRP Direct Emitters – 2020
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2020 JAMES H MILLER JR 17,232,898 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 LABADIE 15,710,653 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 GEN J M GAVIN 13,788,223 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 MARTIN LAKE 13,518,397 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 OAK GROVE 13,103,472 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 MONROE 13,059,155 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 PRAIRIE STATE GENERATING STATION 11,950,052 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 GIBSON 11,391,066 FOSSIL FUEL ELECTRIC POWER GENERATION
2020 EXXONMOBIL BT SITE 11,308,233 PETROLEUM REFINERIES
2020 JIM BRIDGER 11,225,008 FOSSIL FUEL ELECTRIC POWER GENERATION
$2021
EPA GHGRP Direct Emitters – 2021
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2021 JAMES H MILLER JR 20,998,639 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 LABADIE 15,760,177 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 MONROE 14,379,178 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 W A PARISH 13,911,354 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 MARTIN LAKE 13,515,092 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 GEN J M GAVIN 13,478,316 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 OAK GROVE 12,617,336 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 PRAIRIE STATE GENERATING STATION 12,496,789 FOSSIL FUEL ELECTRIC POWER GENERATION
2021 EXXONMOBIL BT SITE 11,811,121 PETROLEUM REFINERIES
2021 JOHN E AMOS 11,528,677 FOSSIL FUEL ELECTRIC POWER GENERATION
$2022
EPA GHGRP Direct Emitters – 2022
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2022 JAMES H MILLER JR 21,775,440 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 LABADIE 15,860,759 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 MONROE 14,908,126 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 MARTIN LAKE 13,330,423 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 OAK GROVE 12,697,798 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 EXXONMOBIL BT SITE 12,611,929 PETROLEUM REFINERIES
2022 W A PARISH 12,436,232 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 HARRISON POWER STATION 11,694,164 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 LARAMIE RIVER 11,524,663 FOSSIL FUEL ELECTRIC POWER GENERATION
2022 GEN J M GAVIN 11,298,607 FOSSIL FUEL ELECTRIC POWER GENERATION
$2023
EPA GHGRP Direct Emitters – 2023
Top 10 Parent Companies by Emissions (tCO₂e)
Year Name Emissions (tCO₂e) NAICS Title
2023 JAMES H MILLER JR 16,558,381 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 LABADIE 15,388,715 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 GEN J M GAVIN 13,451,174 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 MARTIN LAKE 12,787,949 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 EXXONMOBIL BT SITE 12,693,547 PETROLEUM REFINERIES
2023 OAK GROVE 12,278,594 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 PRAIRIE STATE GENERATING STATION 11,456,295 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 MONROE 11,413,347 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 HARRISON POWER STATION 11,179,958 FOSSIL FUEL ELECTRIC POWER GENERATION
2023 LARAMIE RIVER 10,979,532 FOSSIL FUEL ELECTRIC POWER GENERATION

Supplier Facilities

This section processes the supplier facilities data from the EPA GHGRP. It begins by converting various text-based emission fields—such as those for coal-based liquid fuel production and petroleum products—into numeric values, while treating any missing or confidential values as zeros. The code then renames and selects the key columns (facility ID, facility name, NAICS code, emissions, and year) to ensure consistency with the final output format. Next, the supplier data is merged with NAICS Codes to attach industry classification titles to each facility. Finally, the data is organized by year, and for each year the top 10 supplier facilities (sorted by emissions) are displayed using the gt package to create interactive, well-formatted tables.

# Convert character columns to numeric for the necessary emission fields,
# treating NAs and confidential values as zeros.
Suppliers <- GHGP_combined_data$Suppliers %>%
  mutate(
    `Emissions (tCO₂e)` = coalesce(
      as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_coal_based_liquid_fuel_production)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_produced)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_imported)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_exported)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_natural_gas_supply)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_natural_gas_liquids_supply)), 0) +
      coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_co2_supply)), 0)
  )

# Rename columns to align with the final output format for suppliers
fac_suppliers_data <- Suppliers %>%
  mutate(
    primary_naics_code = as.numeric(primary_naics_code)
  ) %>%
  rename(
    Facility = facility_id, 
    Name = facility_name,
    `2022 NAICS Code` = primary_naics_code, 
    Year = year
  ) %>%
  select(Facility, Name, `2022 NAICS Code`, `Emissions (tCO₂e)`, Year)

# Merge supplier data with NAICS codes to include industry titles
fac_suppliers_data <- fac_suppliers_data %>%
  left_join(naics, by = "2022 NAICS Code")

# Prepare a presentation table by removing redundant columns and renaming for clarity
fac_suppliers_data_table <- fac_suppliers_data %>% 
  select(-`2022 NAICS Code`, -Facility) %>% 
  rename(`NAICS Title` = `2022 NAICS Title`) %>% 
  arrange(desc(`Emissions (tCO₂e)`))

# Split the data by year for per-year reporting
fac_suppliers_by_year <- split(fac_suppliers_data_table, fac_suppliers_data_table$Year) 

# For each year, create and render a GT table showing the top 10 supplier parent companies
lapply(fac_suppliers_by_year, function(df_year) {
  current_year <- unique(df_year$Year)
  df_year %>%
    arrange(desc(`Emissions (tCO₂e)`)) %>%
    slice_head(n = 10) %>% 
    gt() %>%
    tab_header(
      title = paste("EPA GHGP Suppliers –", current_year),
      subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
    ) %>%
    fmt_number(
      columns = vars(`Emissions (tCO₂e)`),
      decimals = 0,
      use_seps = TRUE
    ) %>%
    cols_width(
      Name ~ px(300),
      `NAICS Title` ~ px(300),
      `Emissions (tCO₂e)` ~ px(200)
    ) %>%
    cols_align(
      align = "center",
      columns = everything()
    )
})
$2016
EPA GHGP Suppliers – 2016
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
MOTIVA ENTERPRISES LLC 85,411,041 2016 PETROLEUM REFINERIES
GARYVILLE REFINERY 83,342,023 2016 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 77,118,287 2016 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 73,764,091 2016 PETROLEUM REFINERIES
VALERO MARKETING AND SUPPLY COMPANY 68,788,211 2016 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
GALVESTON BAY REFINERY 63,711,477 2016 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 57,816,931 2016 PETROLEUM REFINERIES
CHEVRON MM PASCAGOULA 53,184,465 2016 PETROLEUM REFINERIES
TESORO REFINING MARKETING CARSON REFINERY 52,600,641 2016 PETROLEUM REFINERIES
BP WHITING BUSINESS UNIT 48,341,410 2016 PETROLEUM REFINERIES
$2017
EPA GHGP Suppliers – 2017
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
MOTIVA ENTERPRISES LLC 89,561,871 2017 PETROLEUM REFINERIES
GARYVILLE REFINERY 85,165,684 2017 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 74,986,475 2017 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 70,977,274 2017 PETROLEUM REFINERIES
GALVESTON BAY REFINERY 65,855,431 2017 PETROLEUM REFINERIES
VALERO MARKETING AND SUPPLY COMPANY 59,095,885 2017 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
BP WHITING BUSINESS UNIT 55,259,607 2017 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 54,640,332 2017 PETROLEUM REFINERIES
CHEVRON MM PASCAGOULA 54,539,051 2017 PETROLEUM REFINERIES
TESORO REFINING MARKETING CARSON REFINERY 51,979,688 2017 PETROLEUM REFINERIES
$2018
EPA GHGP Suppliers – 2018
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
MOTIVA ENTERPRISES LLC 90,855,626 2018 PETROLEUM REFINERIES
GARYVILLE REFINERY 87,937,165 2018 PETROLEUM REFINERIES
VALERO MARKETING AND SUPPLY COMPANY 83,328,500 2018 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
GALVESTON BAY REFINERY 75,976,955 2018 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 75,791,749 2018 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 74,443,926 2018 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 60,912,652 2018 PETROLEUM REFINERIES
BP WHITING BUSINESS UNIT 55,106,714 2018 PETROLEUM REFINERIES
PREMCOR REFINING GROUP INCORPORATED PORTARTHUR REFINERY 52,562,347 2018 PETROLEUM REFINERIES
CHEVRON MM PASCAGOULA 51,000,986 2018 PETROLEUM REFINERIES
$2019
EPA GHGP Suppliers – 2019
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
MOTIVA ENTERPRISES LLC 88,062,057 2019 PETROLEUM REFINERIES
GARYVILLE REFINERY 82,986,331 2019 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 77,569,433 2019 PETROLEUM REFINERIES
GALVESTON BAY REFINERY 77,378,042 2019 PETROLEUM REFINERIES
VALERO MARKETING AND SUPPLY COMPANY 72,076,497 2019 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
EXXONMOBIL BT SITE 68,729,170 2019 PETROLEUM REFINERIES
BP WHITING BUSINESS UNIT 58,054,768 2019 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 56,010,912 2019 PETROLEUM REFINERIES
CHEVRON MM PASCAGOULA 52,178,591 2019 PETROLEUM REFINERIES
TESORO REFINING MARKETING CARSON REFINERY 50,628,067 2019 PETROLEUM REFINERIES
$2020
EPA GHGP Suppliers – 2020
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
MOTIVA ENTERPRISES LLC 84,684,302 2020 PETROLEUM REFINERIES
GARYVILLE REFINERY 84,389,430 2020 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 76,148,788 2020 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 69,760,075 2020 PETROLEUM REFINERIES
ENTERPRISE PRODUCTS OPERATING LLC 66,679,801 2020 MARINE CARGO HANDLING
GALVESTON BAY REFINERY 66,235,711 2020 PETROLEUM REFINERIES
VALERO MARKETING AND SUPPLY COMPANY 61,214,779 2020 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
BP WHITING BUSINESS UNIT 57,236,188 2020 PETROLEUM REFINERIES
MONT BELVIEU FRACTIONATOR 48,876,668 2020 NATURAL GAS EXTRACTION
TRAFIGURA TRADING LLC 48,701,114 2020 WHOLESALE TRADE AGENTS AND BROKERS
$2021
EPA GHGP Suppliers – 2021
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
VALERO MARKETING AND SUPPLY COMPANY 96,018,305 2021 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
GARYVILLE REFINERY 89,301,972 2021 PETROLEUM REFINERIES
MOTIVA ENTERPRISES LLC 88,602,296 2021 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 78,710,667 2021 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 72,620,394 2021 PETROLEUM REFINERIES
GALVESTON BAY REFINERY 67,327,296 2021 PETROLEUM REFINERIES
ENTERPRISE PRODUCTS OPERATING LLC 60,076,255 2021 MARINE CARGO HANDLING
BP WHITING BUSINESS UNIT 58,664,235 2021 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 52,352,915 2021 PETROLEUM REFINERIES
EXXONMOBIL BEAUMONT REFINERY 52,351,018 2021 PETROLEUM REFINERIES
$2022
EPA GHGP Suppliers – 2022
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
VALERO MARKETING AND SUPPLY COMPANY 169,436,028 2022 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
MOTIVA ENTERPRISES LLC 92,888,241 2022 PETROLEUM REFINERIES
GARYVILLE REFINERY 90,625,212 2022 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 86,689,540 2022 PETROLEUM REFINERIES
GALVESTON BAY REFINERY 79,286,393 2022 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 79,154,149 2022 PETROLEUM REFINERIES
ENTERPRISE PRODUCTS OPERATING LLC 64,534,515 2022 MARINE CARGO HANDLING
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 63,421,358 2022 PETROLEUM REFINERIES
BP WHITING BUSINESS UNIT 62,426,213 2022 PETROLEUM REFINERIES
TRAFIGURA TRADING LLC 61,825,661 2022 WHOLESALE TRADE AGENTS AND BROKERS
$2023
EPA GHGP Suppliers – 2023
Top 10 Parent Companies by Emissions (tCO₂e)
Name Emissions (tCO₂e) Year NAICS Title
VALERO MARKETING AND SUPPLY COMPANY 238,380,354 2023 PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK STATIONS AND TERMINALS)
GARYVILLE REFINERY 93,705,370 2023 PETROLEUM REFINERIES
MOTIVA ENTERPRISES LLC 91,983,868 2023 PETROLEUM REFINERIES
EXXONMOBIL BEAUMONT REFINERY 83,078,905 2023 PETROLEUM REFINERIES
EXXONMOBIL BT SITE 82,201,241 2023 PETROLEUM REFINERIES
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT 80,977,604 2023 PETROLEUM REFINERIES
ENTERPRISE PRODUCTS OPERATING LLC 72,902,210 2023 MARINE CARGO HANDLING
GALVESTON BAY REFINERY 71,172,513 2023 PETROLEUM REFINERIES
BP WHITING BUSINESS UNIT 65,956,897 2023 PETROLEUM REFINERIES
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX 64,197,374 2023 PETROLEUM REFINERIES

Calculating Corporate Emissions

In this section, we merge the emissions data from direct emitters and suppliers with the parent company information. This allows us to attribute each facility’s reported GHG emissions—measured in metric tons of CO2 equivalent (tCO₂e)—to its owning corporate entities based on their percentage ownership. The following code performs the necessary data type conversions, left joins, and filtering to prepare the data for further analysis.

# Convert relevant columns in parent_company_data to the appropriate types
parent_company_data <- parent_company_data %>%
  mutate(
    Facility = as.character(as.numeric(Facility)),
    Year = as.numeric(Year)
  )

# Merge direct emitter data with parent company information
parent_emitters <- fac_directemitters_data %>%
  mutate(
    Facility = as.character(as.numeric(Facility)),
    Year = as.numeric(Year)
  ) %>%
  left_join(parent_company_data, by = c("Facility", "Year")) %>% 
  filter(`Emissions (tCO₂e)` != 0)

# Merge supplier data with parent company information using a similar approach.
parent_suppliers <- fac_suppliers_data %>%
  mutate(
    Facility = as.character(as.numeric(Facility)),
    Year = as.numeric(Year)
  ) %>%
  left_join(parent_company_data, by = c("Facility", "Year")) %>% 
  filter(`Emissions (tCO₂e)` != 0)

# Display a sample of the merged data for verification.
#parent_emitters %>% glimpse() 
#parent_suppliers%>% glimpse() 

Next, we calculate each parent company’s responsibility for emissions based on their percentage ownership. For each facility, the reported emissions are multiplied by the company’s ownership percentage (divided by 100) to determine the share of emissions attributed to that company.

# For direct emitters:
epa_parent_company_emitters <- parent_emitters %>%
  group_by(Year) %>% 
  mutate(`Parent Company Emissions (tCO₂e)` = as.numeric(`Emissions (tCO₂e)`) *
           as.numeric(parent_co_percent_ownership) / 100) %>% 
  rename(`Parent Company` = parent_company_name)

# For supplier facilities:
epa_parent_company_suppliers <- parent_suppliers %>%
  group_by(Year) %>% 
  mutate(`Parent Company Emissions (tCO₂e)` = as.numeric(`Emissions (tCO₂e)`) *
           as.numeric(parent_co_percent_ownership) / 100) %>% 
  rename(`Parent Company` = parent_company_name)

write_csv(epa_parent_company_suppliers, here("Data", "Processed", "EPA_GHGP_Parent_Company_Suppliers.csv"))
write_csv(epa_parent_company_emitters, here("Data", "Processed", "EPA_GHGP_Parent_Company_Emitters.csv"))

Finally, we summarize the emissions data by parent company and year. The summarized data includes the total emissions attributed to each company and the number of facilities contributing to those emissions. Formatted tables are then generated for each year using the gt package, which creates interactive and publication-ready tables.

# Summarize emissions for direct emitters by parent company and year.
summarized_emitters <- epa_parent_company_emitters %>%
  group_by(`Parent Company`, Year) %>%
  summarise(
    `Parent Company Emissions (tCO₂e)` = sum(as.numeric(`Parent Company Emissions (tCO₂e)`), na.rm = TRUE),
    `# of Facilities` = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>% 
  ungroup()

# Summarize emissions for supplier facilities by parent company and year.
suppliers_summarized <- epa_parent_company_suppliers %>%
  group_by(`Parent Company`, Year) %>%
  summarise(
    `Parent Company Emissions (tCO₂e)` = sum(as.numeric(`Parent Company Emissions (tCO₂e)`), na.rm = TRUE),
    `# of Facilities` = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>% 
  ungroup()

# Define a function that creates a GT table for the top 10 companies per year.
# The function returns a list of GT tables, one per year.
print_parent_company_tables <- function(summary_df, title_prefix = "Emitters") {
  # Extract unique years from the dataset
  years <- sort(unique(summary_df$Year))
  # Initialize an empty list to store GT tables
  gt_tables <- list()
  
  # Loop over each year
  for (yr in years) {
    # Filter data for the current year and select top 10 companies by emissions
    gt_table <- summary_df %>%
      filter(Year == yr) %>%
      select(-Year) %>%           # Remove Year column for display
      slice_head(n = 10) %>%      # Select top 10 companies
      gt() %>%
      fmt_number(
        columns = vars(`Parent Company Emissions (tCO₂e)`),
        decimals = 0,
        use_seps = TRUE
      ) %>%
      cols_label(
        `Parent Company Emissions (tCO₂e)` = "Emissions (tCO₂e)",
        `# of Facilities` = "# Facilities"
      ) %>%
      cols_width(
        `Parent Company` ~ px(300),
        `Parent Company Emissions (tCO₂e)` ~ px(200),
        `# of Facilities` ~ px(150)
      ) %>%
      tab_header(
        title = paste(title_prefix, "–", yr)
      )
    
    # Add the table to the list using the year as the name
    gt_tables[[as.character(yr)]] <- gt_table
  }
  
  # Return the list of GT tables
  return(gt_tables)
}

# Generate formatted GT tables for direct emitters and supplier emissions.
direct_emitter_tables <- print_parent_company_tables(summarized_emitters, title_prefix = "EPA GHGP Corporate Direct Emitters")
supplier_tables <- print_parent_company_tables(suppliers_summarized, title_prefix = "EPA GHGP Corporate Supplier Emissions")

# In an R Markdown document, simply returning the list will render the tables.
direct_emitter_tables
$2016
EPA GHGP Corporate Direct Emitters – 2016
Parent Company Emissions (tCO₂e) # Facilities
EXXONMOBIL CORP 8,063,542 37
BP AMERICA INC 7,027,144 12
CONOCOPHILLIPS 6,876,850 15
ANADARKO PETROLEUM CORP 5,366,493 16
WILLIAMS PARTNERS LP 5,361,810 13
ENERGY TRANSFER PARTNERS LP 4,925,638 13
CHESAPEAKE ENERGY CORP 3,972,768 13
CHEVRON CORP 3,815,182 15
EOG RESOURCES INC 3,557,717 19
SOUTHWESTERN ENERGY CO 3,014,378 4
$2017
EPA GHGP Corporate Direct Emitters – 2017
Parent Company Emissions (tCO₂e) # Facilities
EXXONMOBIL CORP 7,007,264 33
BP AMERICA INC 6,458,902 12
WILLIAMS PARTNERS LP 5,480,890 13
HILCORP ENERGY CO 4,998,568 13
ENERGY TRANSFER PARTNERS LP 4,935,847 13
DCP MIDSTREAM LP 4,842,979 9
CONOCOPHILLIPS 4,087,293 13
EOG RESOURCES INC 3,853,076 18
ANADARKO PETROLEUM CORP 3,781,616 11
CHEVRON CORP 3,444,193 11
$2018
EPA GHGP Corporate Direct Emitters – 2018
Parent Company Emissions (tCO₂e) # Facilities
VISTRA ENERGY CORP 114,272,492 51
SOUTHERN CO 99,309,274 36
DUKE ENERGY CORP 97,383,677 47
AMERICAN ELECTRIC POWER 68,513,351 28
BERKSHIRE HATHAWAY INC 53,971,373 49
XCEL ENERGY 46,923,088 36
NRG ENERGY INC 39,287,627 31
CALPINE CORP 39,269,420 47
EXXONMOBIL CORP 39,268,814 88
ENTERGY CORP 37,726,617 26
$2019
EPA GHGP Corporate Direct Emitters – 2019
Parent Company Emissions (tCO₂e) # Facilities
VISTRA ENERGY CORP 102,761,542 49
THE SOUTHERN CO 87,487,053 63
DUKE ENERGY CORP 85,958,619 47
AMERICAN ELECTRIC POWER CO INC 70,020,857 31
BERKSHIRE HATHAWAY INC 64,518,982 96
NEXTERA ENERGY INC 44,607,595 30
XCEL ENERGY 43,084,156 36
EXXON MOBIL CORP 42,750,562 91
VOLT PARENT LP 40,168,527 44
ENTERGY CORP 36,294,210 27
$2020
EPA GHGP Corporate Direct Emitters – 2020
Parent Company Emissions (tCO₂e) # Facilities
VISTRA CORP 91,782,979 40
THE SOUTHERN CO 75,724,039 58
DUKE ENERGY CORP 75,471,450 47
BERKSHIRE HATHAWAY INC 57,581,146 129
AMERICAN ELECTRIC POWER CO INC 54,179,231 30
VOLT PARENT LP 44,574,110 43
NEXTERA ENERGY INC 42,552,839 31
EXXON MOBIL CORP 41,492,248 89
DOMINION ENERGY INC 36,006,719 47
XCEL ENERGY INC 35,750,399 35
$2021
EPA GHGP Corporate Direct Emitters – 2021
Parent Company Emissions (tCO₂e) # Facilities
VISTRA CORP 95,510,483 44
THE SOUTHERN CO 82,154,166 58
DUKE ENERGY CORP 78,135,150 48
BERKSHIRE HATHAWAY INC 63,872,118 129
AMERICAN ELECTRIC POWER CO INC 62,351,437 28
EXXON MOBIL CORP 43,484,952 91
VOLT PARENT LP 42,828,661 44
NEXTERA ENERGY INC 41,395,422 30
XCEL ENERGY INC 38,629,183 35
ENTERGY CORP 37,166,686 32
$2022
EPA GHGP Corporate Direct Emitters – 2022
Parent Company Emissions (tCO₂e) # Facilities
VISTRA CORP 92,454,741 43
THE SOUTHERN CO 83,987,686 56
DUKE ENERGY CORP 78,479,645 49
AMERICAN ELECTRIC POWER CO INC 58,686,889 27
BERKSHIRE HATHAWAY INC 58,241,154 129
EXXON MOBIL CORP 42,244,322 86
NEXTERA ENERGY INC 41,639,555 29
ENTERGY CORP 40,219,776 31
XCEL ENERGY INC 37,521,758 36
CPN MANAGEMENT LP 33,550,720 28
$2023
EPA GHGP Corporate Direct Emitters – 2023
Parent Company Emissions (tCO₂e) # Facilities
VISTRA CORP 86,320,993 40
THE SOUTHERN CO 76,718,005 54
DUKE ENERGY CORP 72,776,123 49
BERKSHIRE HATHAWAY INC 53,055,963 128
AMERICAN ELECTRIC POWER CO INC 48,971,843 26
CPN MANAGEMENT LP 47,703,517 44
NEXTERA ENERGY INC 42,867,570 27
ENTERGY CORP 38,142,109 30
EXXON MOBIL CORP 37,882,422 59
XCEL ENERGY INC 36,005,568 36
supplier_tables
$2016
EPA GHGP Corporate Supplier Emissions – 2016
Parent Company Emissions (tCO₂e) # Facilities
PHILLIPS 66 336,693,357 19
VALERO ENERGY CORP 320,675,752 15
MARATHON PETROLEUM CO LP 274,434,705 8
EXXONMOBIL CORP 264,232,481 14
SHELL OIL CO 179,811,952 11
CHEVRON CORP 161,409,117 7
PDV HOLDING INC 129,223,074 5
TESORO CORP 118,882,739 7
ENTERPRISE PRODUCTS PARTNERS LP 113,600,350 15
BP AMERICA INC 112,266,235 8
$2017
EPA GHGP Corporate Supplier Emissions – 2017
Parent Company Emissions (tCO₂e) # Facilities
PHILLIPS 66 351,616,213 17
VALERO ENERGY CORP 331,428,491 15
MARATHON PETROLEUM CO LP 278,381,915 8
EXXONMOBIL CORP 266,631,258 14
SHELL OIL CO 215,892,381 11
CHEVRON CORP 175,521,491 7
BP AMERICA INC 141,130,289 8
TESORO CORP 128,749,113 8
PDV HOLDING INC 121,841,178 5
ENTERPRISE PRODUCTS PARTNERS LP 121,388,975 15
$2018
EPA GHGP Corporate Supplier Emissions – 2018
Parent Company Emissions (tCO₂e) # Facilities
PHILLIPS 66 374,397,807 17
VALERO ENERGY CORP 367,804,281 15
MARATHON PETROLEUM CO LP 314,928,553 9
EXXONMOBIL CORP 287,284,557 13
SHELL OIL CO 172,409,795 10
CHEVRON CORP 169,072,637 6
PDV HOLDING INC 131,545,877 5
BP AMERICA INC 126,697,011 6
ENTERPRISE PRODUCTS PARTNERS LP 124,207,040 16
TESORO CORP 110,382,394 6
$2019
EPA GHGP Corporate Supplier Emissions – 2019
Parent Company Emissions (tCO₂e) # Facilities
MARATHON PETROLEUM CORP 462,298,283 20
PHILLIPS 66 375,433,574 17
VALERO ENERGY CORP 341,421,586 15
EXXON MOBIL CORP 283,974,088 13
SHELL PETROLEUM INC 182,902,796 9
CHEVRON CORP 173,927,432 5
BP AMERICA INC 145,701,026 6
ENTERPRISE PRODUCTS PARTNERS LP 135,147,774 16
PDV AMERICA INC 117,506,952 5
KOCH INDUSTRIES INC 99,170,496 4
$2020
EPA GHGP Corporate Supplier Emissions – 2020
Parent Company Emissions (tCO₂e) # Facilities
MARATHON PETROLEUM CORP 401,574,105 21
PHILLIPS 66 324,103,187 17
VALERO ENERGY CORP 298,505,939 15
EXXON MOBIL CORP 282,255,946 13
ENTERPRISE PRODUCTS PARTNERS LP 168,952,523 18
SHELL PETROLEUM INC 149,092,739 9
CHEVRON CORP 135,388,031 5
BP AMERICA INC 133,602,244 5
ARAMCO SERVICES CO 102,916,368 2
PDV AMERICA INC 94,924,513 5
$2021
EPA GHGP Corporate Supplier Emissions – 2021
Parent Company Emissions (tCO₂e) # Facilities
MARATHON PETROLEUM CORP 422,682,631 18
VALERO ENERGY CORP 356,721,973 15
PHILLIPS 66 339,053,077 16
EXXON MOBIL CORP 298,748,974 12
ENTERPRISE PRODUCTS PARTNERS LP 167,386,818 16
CHEVRON CORP 158,393,471 5
ARAMCO SERVICES CO 108,739,236 2
BP AMERICA INC 107,686,993 4
PDV AMERICA INC 106,156,281 5
KOCH INDUSTRIES INC 103,765,062 4
$2022
EPA GHGP Corporate Supplier Emissions – 2022
Parent Company Emissions (tCO₂e) # Facilities
MARATHON PETROLEUM CORP 442,905,790 17
VALERO ENERGY CORP 436,980,733 15
EXXON MOBIL CORP 327,048,178 13
PHILLIPS 66 318,555,900 16
ENTERPRISE PRODUCTS PARTNERS LP 178,531,580 16
CHEVRON CORP 154,340,013 6
PDV AMERICA INC 124,001,333 5
PBF ENERGY INC 120,232,669 7
BP AMERICA INC 107,083,164 5
KOCH INDUSTRIES INC 107,012,557 4
$2023
EPA GHGP Corporate Supplier Emissions – 2023
Parent Company Emissions (tCO₂e) # Facilities
VALERO ENERGY CORP 511,680,742 15
MARATHON PETROLEUM CORP 437,065,133 17
PHILLIPS 66 346,616,539 19
EXXON MOBIL CORP 341,759,108 10
ENTERPRISE PRODUCTS PARTNERS LP 196,807,286 17
CHEVRON CORP 164,470,509 5
BP AMERICA INC 142,190,500 5
PDV AMERICA INC 122,786,461 5
PBF ENERGY INC 107,787,144 7
KOCH INDUSTRIES INC 104,440,178 4

Visualizing Top Direct Emitters and Suppliers

This section defines a function that, for each year, filters the summarized emissions data, selects the top 10 parent companies based on their attributed emissions, and generates a horizontal bar chart using ggplot2. The resulting plots offer a clear visual comparison of corporate GHG emissions across different years for both direct emitters and suppliers.

plot_top_emitters <- function(data, title_prefix = "Emitters") {
  # Load the stringr package for text wrapping functionality
  library(stringr)
  
  # Define a custom palette of 10 distinct, saturated colors using RColorBrewer's "Paired" palette.
  custom_colors <- RColorBrewer::brewer.pal(10, "Paired")
  
  # Extract and sort the unique years present in the dataset
  years <- sort(unique(data$Year))
  
  # Loop through each year in the dataset
  for (yr in years) {
    # Filter data for the current year, sort by emissions in descending order,
    # select the top 10 companies, and reorder the parent company factor by emissions.
    df <- data %>%
      filter(Year == yr) %>%
      arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>%
      slice_head(n = 10) %>%
      mutate(`Parent Company` = fct_reorder(`Parent Company`, `Parent Company Emissions (tCO₂e)`))
    
    # Generate a horizontal bar chart using ggplot2:
    # - geom_col() creates the bars with a fixed width.
    # - coord_flip() rotates the chart for better readability.
    # - scale_fill_manual() applies the custom color palette.
    # - scale_x_discrete() wraps long labels for clarity.
    # - scale_y_continuous() formats y-axis labels with commas.
    # - labs() sets the title, subtitle, and axis labels.
    # - theme_minimal() with additional theme adjustments ensures a clean presentation.
    p <- ggplot(df, aes(
      x = `Parent Company`,
      y = `Parent Company Emissions (tCO₂e)`,
      fill = `Parent Company`
    )) +
      geom_col(width = 0.6, show.legend = FALSE) +
      coord_flip() +
      scale_fill_manual(values = custom_colors) +
      scale_x_discrete(labels = function(x) str_wrap(x, width = 20)) +
      scale_y_continuous(labels = scales::comma) +
      labs(
        title = paste(title_prefix, "-", yr),
        subtitle = "Top 10 Parent Companies by Emissions",
        x = "Parent Company",
        y = "Emissions (tCO₂e)"
      ) +
      theme_minimal(base_size = 11) +
      theme(
        plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
        plot.subtitle = element_text(size = 11, hjust = 0.5),
        axis.title.x = element_text(face = "bold", size = 11),
        axis.title.y = element_text(face = "bold", size = 11),
        axis.text.y = element_text(size = 10),
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_line(color = "grey80"),
        plot.background = element_rect(fill = "white", color = NA),
        panel.background = element_rect(fill = "white", color = NA)
      )
    
    # Print the plot for the current year
    print(p)
  }
}

# Example usage: Generate plots for both direct emitters and supplier emissions.
plot_top_emitters(summarized_emitters, title_prefix = "Direct Emitters")

plot_top_emitters(suppliers_summarized, title_prefix = "Supplier Emissions")

This function aggregates emission data by parent company across the available time period (2016-2013), selects the top 10 companies based on their total emissions, and generates a horizontal bar chart. The chart visually displays the total emissions attributed to each parent company, facilitating an academic-level comparison across the years.

# This function aggregates emissions by parent company across all years (2016–2023)

# for improved spacing between axis titles and the axis text.
plot_top_10_parent_companies <- function(data, title = "Top 10 Parent Companies (2016–2023)") {
  # Aggregate data: group by Parent Company and sum their emissions
  top10 <- data %>%
    group_by(`Parent Company`) %>%
    summarise(`Total Emissions (tCO₂e)` = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
    arrange(desc(`Total Emissions (tCO₂e)`)) %>%
    slice_head(n = 10) %>%
    mutate(`Parent Company` = fct_reorder(`Parent Company`, `Total Emissions (tCO₂e)`))
  
  # Define a custom palette of 10 distinct, saturated colors using RColorBrewer's "Paired" palette.
  custom_colors <- RColorBrewer::brewer.pal(10, "Paired")
  
  # Create a horizontal bar chart using ggplot2 with refined aesthetics.
  p <- ggplot(top10, aes(x = `Parent Company`, y = `Total Emissions (tCO₂e)`, fill = `Parent Company`)) +
    geom_col(width = 0.6, show.legend = FALSE) +       # Fixed bar width, no legend.
    coord_flip() +                                      # Horizontal bars.
    scale_fill_manual(values = custom_colors) +         # Apply the custom color palette.
    scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 25)) +  # Wrap long labels.
    scale_y_continuous(labels = scales::comma) +        # Format y-axis labels with commas.
    labs(
      title = title,
      x = "Parent Company",
      y = "Total Emissions (tCO₂e)"
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
      axis.title.x = element_text(face = "bold", size = 12, margin = margin(t = 10)),
      axis.title.y = element_text(face = "bold", size = 12, margin = margin(r = 10)),
      axis.text.y = element_text(size = 10),
      axis.text.x = element_text(size = 8),
      panel.grid.major.y = element_blank(),             # Remove horizontal grid lines.
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_line(color = "grey80"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(t = 30, r = 30, b = 30, l = 30)
    )
  
  print(p)
}

# Generate publication-quality plots using the aggregated data across all years.
plot_top_10_parent_companies(summarized_emitters, title = "Top 10 Direct Emitters (2016–2023)")

plot_top_10_parent_companies(suppliers_summarized, title = "Top 10 Supplier Emissions (2016–2023)")

Share of Emissions by Corporations

This section calculates the percentage share of total emissions (in tCO₂e) accounted for by the top 50 parent companies for each year. The analysis is performed separately for direct emitters and suppliers, providing insights into the quantities of emissions among the largest corporate entities. The code filters the summarized data for each year, calculates the total emissions, selects the top 50 companies based on emissions, and computes the percentage share of emissions attributed to these companies. The results are displayed for each year, highlighting the contribution of the top 50 companies to the total emissions in the dataset.

# This code calculates the percentage share of total emissions (tCO₂e)  accounted for by the top 50 parent companies for each year, separately for direct emitters and suppliers.

# Calculate shares for Direct Emitters
direct_shares <- summarized_emitters %>%
  group_by(Year) %>%
  nest() %>%
  mutate(
    Total_Emissions = map_dbl(data, ~ sum(.x$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
    Top10_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 10)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
    Top50_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 50)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE))
  ) %>%
  mutate(
    `Top 10 Share (%)` = round(Top10_Emissions / Total_Emissions * 100, 1),
    `Top 50 Share (%)` = round(Top50_Emissions / Total_Emissions * 100, 1)
  ) %>%
  select(Year, `Top 10 Share (%)`, `Top 50 Share (%)`) %>% 
  ungroup() %>% 
  arrange(desc(Year))

# Display a GT table for Direct Emitters
direct_shares %>% 
  gt() %>%
  tab_header(
    title = "Direct Emitters: Percentage Share by Top Companies",
    subtitle = "Percentage share of total emissions accounted for by the top 10 and top 50 companies"
  ) %>%
  fmt_percent(
    columns = c(`Top 10 Share (%)`, `Top 50 Share (%)`),
    decimals = 1,
    scale_values = FALSE
  ) 
Direct Emitters: Percentage Share by Top Companies
Percentage share of total emissions accounted for by the top 10 and top 50 companies
Year Top 10 Share (%) Top 50 Share (%)
2023 21.3% 49.1%
2022 21.3% 49.2%
2021 21.9% 50.1%
2020 21.6% 49.3%
2019 22.0% 49.7%
2018 21.7% 48.8%
2017 25.9% 63.3%
2016 28.4% 66.4%
# Calculate shares for Suppliers
suppliers_shares <- suppliers_summarized %>%
  group_by(Year) %>%
  nest() %>%
  mutate(
    Total_Emissions = map_dbl(data, ~ sum(.x$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
    Top10_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 10)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
    Top50_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 50)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE))
  ) %>%
  mutate(
    `Top 10 Share (%)` = round(Top10_Emissions / Total_Emissions * 100, 1),
    `Top 50 Share (%)` = round(Top50_Emissions / Total_Emissions * 100, 1)
  ) %>%
  select(Year, `Top 10 Share (%)`, `Top 50 Share (%)`) %>% 
  ungroup() %>% 
  arrange(desc(Year))

# Display a GT table for Suppliers
suppliers_shares %>% 
  gt() %>%
  tab_header(
    title = "Suppliers: Percentage Share by Top Companies",
    subtitle = "Percentage share of total emissions accounted for by the top 10 and top 50 companies"
  ) %>%
  fmt_percent(
    columns = c(`Top 10 Share (%)`, `Top 50 Share (%)`),
    decimals = 1,
    scale_values = FALSE
  )
Suppliers: Percentage Share by Top Companies
Percentage share of total emissions accounted for by the top 10 and top 50 companies
Year Top 10 Share (%) Top 50 Share (%)
2023 53.2% 87.0%
2022 51.3% 86.5%
2021 52.0% 86.8%
2020 51.6% 86.7%
2019 52.5% 86.4%
2018 50.0% 85.1%
2017 51.0% 85.3%
2016 49.3% 83.7%

The top 50 direct emitters are responsible for ~50% share of total emissions. The top 10 suppliers account for ~85% of total emissions. This analysis provides insights about the relative contributions of the largest corporate entities to overall emissions.

Social Cost of Corporate Emissions

This section quantifies the Social Cost of Carbon (SCC) for each corporate parent by applying a predetermined SCC value (in USD per metric ton of CO2 equivalent) to the reported emissions. The SCC represents the present value of the future damages caused by one additional ton of CO2, encompassing impacts such as reduced agricultural productivity, adverse health outcomes, and increased infrastructure damage from extreme weather events. Seminal works like “The ‘Social Cost of Carbon’ Made Simple” (Newbold et al., 2010) and subsequent studies illustrate that the SCC captures the comprehensive economic cost of climate change. In this analysis, each corporate parent’s emissions (in tCO₂e) are multiplied by an EPA estimated SCC value—allowing for adjustments based on evolving scientific and economic estimates.

Direct Emitter Facilities

# Aggregate direct emitter data across all years by summing emissions per parent company.
direct_total <- summarized_emitters %>%
  group_by(`Parent Company`) %>%
  summarise(Total_Emissions = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
  ungroup()

# Select the top 50 companies by total emissions.
top_direct <- direct_total %>%
  arrange(desc(Total_Emissions)) %>%
  slice_head(n = 50)

# Calculate SCC values for direct emitters for three scenarios without formatting.
direct_total_scc <- top_direct %>%
  mutate(
    SCC_51 = Total_Emissions * 51,
    SCC_100 = Total_Emissions * 100,
    SCC_150 = Total_Emissions * 150
  )

# Compute a summary (total) row that aggregates all the values for the top 50 companies.
direct_total_summary <- direct_total_scc %>%
  summarise(
    `Parent Company` = "TOTAL",
    Total_Emissions = sum(Total_Emissions, na.rm = TRUE),
    SCC_51 = sum(SCC_51, na.rm = TRUE),
    SCC_100 = sum(SCC_100, na.rm = TRUE),
    SCC_150 = sum(SCC_150, na.rm = TRUE)
  )

# Append the summary row to the dataset.
direct_total_scc_combined <- bind_rows(direct_total_scc, direct_total_summary)

# Format the SCC columns as dollar amounts and Total Emissions with commas.
direct_total_scc_combined %>%
  mutate(
    Total_Emissions = scales::comma(Total_Emissions, accuracy = 1),
    SCC_51 = scales::dollar(SCC_51),
    SCC_100 = scales::dollar(SCC_100),
    SCC_150 = scales::dollar(SCC_150)
  ) %>%
  gt() %>%
  tab_header(
    title = "Social Cost of Carbon: Top 50 Direct Emitters (2016–2023)",
    subtitle = "Estimated Present Value of Future Damages"
  ) %>%
  cols_label(
    `Parent Company` = "Parent Company",
    Total_Emissions = "Total Emissions (tCO₂e)",
    SCC_51 = "SCC @ $51/tCO2",
    SCC_100 = "SCC @ $100/tCO2",
    SCC_150 = "SCC @ $150/tCO2"
  ) %>%
  cols_align(
    align = "center",
    columns = c(Total_Emissions, SCC_51, SCC_100, SCC_150)
  )
Social Cost of Carbon: Top 50 Direct Emitters (2016–2023)
Estimated Present Value of Future Damages
Parent Company Total Emissions (tCO₂e) SCC @ $51/tCO2 SCC @ $100/tCO2 SCC @ $150/tCO2
DUKE ENERGY CORP 489,677,218 $24,973,538,123 $48,967,721,809 $73,451,582,713
THE SOUTHERN CO 406,070,950 $20,709,618,429 $40,607,094,958 $60,910,642,438
VISTRA CORP 366,069,196 $18,669,529,015 $36,606,919,637 $54,910,379,455
BERKSHIRE HATHAWAY INC 351,670,571 $17,935,199,126 $35,167,057,109 $52,750,585,663
AMERICAN ELECTRIC POWER CO INC 294,210,257 $15,004,723,084 $29,421,025,655 $44,131,538,483
ENTERGY CORP 224,667,438 $11,458,039,348 $22,466,743,820 $33,700,115,730
VISTRA ENERGY CORP 217,034,035 $11,068,735,761 $21,703,403,453 $32,555,105,180
NEXTERA ENERGY INC 213,062,981 $10,866,212,049 $21,306,298,136 $31,959,447,204
EXXON MOBIL CORP 210,389,296 $10,729,854,093 $21,038,929,595 $31,558,394,392
DOMINION ENERGY INC 198,480,431 $10,122,501,956 $19,848,043,051 $29,772,064,576
NRG ENERGY INC 184,117,703 $9,390,002,851 $18,411,770,296 $27,617,655,444
DTE ENERGY CO 173,657,152 $8,856,514,755 $17,365,715,206 $26,048,572,810
PHILLIPS 66 167,659,786 $8,550,649,064 $16,765,978,557 $25,148,967,836
MARATHON PETROLEUM CORP 156,815,795 $7,997,605,530 $15,681,579,471 $23,522,369,206
AMEREN CORP 152,657,107 $7,785,512,456 $15,265,710,698 $22,898,566,047
XCEL ENERGY INC 147,906,907 $7,543,252,282 $14,790,690,750 $22,186,036,124
CHEVRON CORP 147,504,780 $7,522,743,776 $14,750,477,992 $22,125,716,988
KOCH INDUSTRIES INC 143,059,985 $7,296,059,216 $14,305,998,463 $21,458,997,695
PPL CORP 139,653,181 $7,122,312,244 $13,965,318,126 $20,947,977,189
VALERO ENERGY CORP 138,663,843 $7,071,855,980 $13,866,384,274 $20,799,576,411
VOLT PARENT LP 137,743,550 $7,024,921,073 $13,774,355,046 $20,661,532,569
BASIN ELECTRIC POWER COOPERATIVE 132,839,011 $6,774,789,551 $13,283,901,080 $19,925,851,620
EVERGY INC 127,274,294 $6,490,988,993 $12,727,429,398 $19,091,144,097
US STEEL CORP 119,877,189 $6,113,736,655 $11,987,718,931 $17,981,578,397
CF INDUSTRIES HOLDINGS INC 117,036,515 $5,968,862,241 $11,703,651,453 $17,555,477,179
LIGHTSTONE GENERATION LLC 114,336,069 $5,831,139,526 $11,433,606,914 $17,150,410,372
CLEVELANDCLIFFS INC 112,569,602 $5,741,049,678 $11,256,960,153 $16,885,440,229
AES CORP 112,007,335 $5,712,374,093 $11,200,733,516 $16,801,100,274
ASSOCIATED ELECTRIC COOPERATIVE INC 103,317,410 $5,269,187,908 $10,331,740,996 $15,497,611,494
BP AMERICA INC 102,704,583 $5,237,933,741 $10,270,458,315 $15,405,687,473
CMS ENERGY CORP 100,416,520 $5,121,242,499 $10,041,651,958 $15,062,477,937
SOUTHERN CO 99,501,018 $5,074,551,935 $9,950,101,832 $14,925,152,749
XCEL ENERGY 90,634,791 $4,622,374,363 $9,063,479,142 $13,595,218,713
FIRSTENERGY CORP 90,330,574 $4,606,859,264 $9,033,057,381 $13,549,586,072
TALEN ENERGY CORP 86,629,258 $4,418,092,155 $8,662,925,794 $12,994,388,691
ALLIANT ENERGY CORP 83,811,099 $4,274,366,045 $8,381,109,891 $12,571,664,837
DOW INC 83,362,296 $4,251,477,088 $8,336,229,584 $12,504,344,376
CPN MANAGEMENT LP 81,254,237 $4,143,966,108 $8,125,423,741 $12,188,135,611
AIR PRODUCTS CHEMICALS INC 76,684,794 $3,910,924,512 $7,668,479,434 $11,502,719,152
WASTE MANAGEMENT INC 76,249,543 $3,888,726,692 $7,624,954,298 $11,437,431,447
ENERGY TRANSFER LP 76,106,994 $3,881,456,693 $7,610,699,398 $11,416,049,098
WEC ENERGY GROUP INC 74,005,379 $3,774,274,326 $7,400,537,894 $11,100,806,841
KINDER MORGAN INC 73,224,500 $3,734,449,509 $7,322,450,017 $10,983,675,026
PRAIRIE STATE ENERGY CAMPUS MANAGEMENT CO 71,846,286 $3,664,160,570 $7,184,628,570 $10,776,942,854
CPS ENERGY 71,468,514 $3,644,894,222 $7,146,851,416 $10,720,277,123
ARCHER DANIELS MIDLAND CO 69,194,597 $3,528,924,423 $6,919,459,654 $10,379,189,481
AMERICAN ELECTRIC POWER 69,131,362 $3,525,699,466 $6,913,136,207 $10,369,704,311
OCCIDENTAL PETROLEUM CORP 67,146,363 $3,424,464,488 $6,714,636,251 $10,071,954,376
ENTERPRISE PRODUCTS PARTNERS LP 66,168,480 $3,374,592,486 $6,616,848,012 $9,925,272,018
CONOCOPHILLIPS 64,537,931 $3,291,434,465 $6,453,793,069 $9,680,689,604
TOTAL 7,274,438,704 $370,996,373,906 $727,443,870,403 $1,091,165,805,605

Supplier Facilities

# Aggregate supplier data across all years by summing emissions per parent company.
suppliers_total <- suppliers_summarized %>%
  group_by(`Parent Company`) %>%
  summarise(Total_Emissions = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
  ungroup()

# Select the top 10 companies by total emissions.
top_suppliers <- suppliers_total %>%
  arrange(desc(Total_Emissions)) %>%
  slice_head(n = 10)

# Calculate SCC values for suppliers for the three scenarios.
suppliers_total_scc <- top_suppliers %>%
  mutate(
    SCC_51 = Total_Emissions * 51,
    SCC_100 = Total_Emissions * 100,
    SCC_150 = Total_Emissions * 150
  )

# Compute a summary (total) row for the supplier data.
suppliers_total_summary <-suppliers_total_scc %>%
  summarise(
    `Parent Company` = "TOTAL",
    Total_Emissions = sum(Total_Emissions, na.rm = TRUE),
    SCC_51 = sum(SCC_51, na.rm = TRUE),
    SCC_100 = sum(SCC_100, na.rm = TRUE),
    SCC_150 = sum(SCC_150, na.rm = TRUE)
  )

# Append the summary row to the supplier data.
suppliers_total_scc_combined <- bind_rows(suppliers_total_scc, suppliers_total_summary)

# Format the data and create a GT table for suppliers.
suppliers_total_scc_combined %>%
  mutate(
    Total_Emissions = scales::comma(Total_Emissions, accuracy = 1),
    SCC_51 = scales::dollar(SCC_51),
    SCC_100 = scales::dollar(SCC_100),
    SCC_150 = scales::dollar(SCC_150)
  ) %>%
  gt() %>%
  tab_header(
    title = "Social Cost of Carbon: Top 10 Suppliers (2016–2023)",
    subtitle = "Estimated Present Value of Future Damages"
  ) %>%
  cols_label(
    `Parent Company` = "Parent Company",
    Total_Emissions = "Total Emissions (tCO₂e)",
    SCC_51 = "SCC @ $51/tCO2",
    SCC_100 = "SCC @ $100/tCO2",
    SCC_150 = "SCC @ $150/tCO2"
  ) %>%
  cols_align(
    align = "center",
    columns = c(Total_Emissions, SCC_51, SCC_100, SCC_150)
  )
Social Cost of Carbon: Top 10 Suppliers (2016–2023)
Estimated Present Value of Future Damages
Parent Company Total Emissions (tCO₂e) SCC @ $51/tCO2 SCC @ $100/tCO2 SCC @ $150/tCO2
VALERO ENERGY CORP 2,965,219,498 $151,226,194,407 $296,521,949,818 $444,782,924,727
PHILLIPS 66 2,766,469,653 $141,089,952,294 $276,646,965,281 $414,970,447,922
MARATHON PETROLEUM CORP 2,185,421,825 $111,456,513,064 $218,542,182,478 $327,813,273,718
EXXON MOBIL CORP 1,533,786,295 $78,223,101,020 $153,378,629,450 $230,067,944,175
CHEVRON CORP 1,292,522,699 $65,918,657,637 $129,252,269,877 $193,878,404,816
ENTERPRISE PRODUCTS PARTNERS LP 1,206,022,346 $61,507,139,669 $120,602,234,645 $180,903,351,968
BP AMERICA INC 1,016,357,463 $51,834,230,600 $101,635,746,274 $152,453,619,411
MARATHON PETROLEUM CO LP 867,745,173 $44,255,003,822 $86,774,517,298 $130,161,775,947
EXXONMOBIL CORP 818,148,296 $41,725,563,076 $81,814,829,561 $122,722,244,341
KOCH INDUSTRIES INC 784,259,265 $39,997,222,533 $78,425,926,536 $117,638,889,804
TOTAL 15,435,952,512 $787,233,578,122 $1,543,595,251,219 $2,315,392,876,829

Corporate Emissions (State)

Direct Emitter Facilities

This section focuses on direct emitter facilities in a specific state (e.g., California) by filtering the parent company emissions data for the selected state. The code groups the data by parent company and year, calculates the total emissions and the number of facilities. The resulting tables display the top 10 parent companies by emissions for each year, providing insights into the corporate entities with the highest direct emissions in the chosen state.

# Set the target state (modify as needed)
state_choice <- "CA"

# Filter direct emitter data for the selected state, grouping by parent company and year.
direct_state <- epa_parent_company_emitters %>%
  filter(facility_state == state_choice) %>%
  group_by(`Parent Company`, Year) %>%
  summarise(
    Direct_Emissions = sum(`Emissions (tCO₂e)`, na.rm = TRUE),
    Direct_Facilities = n(),
    .groups = "drop"
  ) %>%
  # Remove entries with zero or NA emissions
  filter(Direct_Emissions != 0, !is.na(Direct_Emissions))

# Split the data by year so that we can create separate tables for each year.
direct_state_by_year <- direct_state %>% group_split(Year)

# Initialize an empty list to store the GT tables.
gt_tables_direct <- list()

# Loop through each year's data and generate a GT table for the top 10 direct emitter parent companies.
for (dt in direct_state_by_year) {
  # Get the current year (should be unique for each split)
  current_year <- unique(dt$Year)
  
  # Create the GT table for the current year's data.
  gt_table_direct <- dt %>%
    arrange(desc(Direct_Emissions)) %>%
    slice_head(n = 10) %>%
    select(-Year) %>% 
    gt() %>%
    tab_header(
      title = paste("EPA GHGP Direct Emitter Corporate Emissions in", state_choice, "for Year", current_year),
      subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
    ) %>%
    cols_label(
      `Parent Company` = "Parent Company",
      Direct_Emissions = "Direct Emissions (tCO₂e)",
      Direct_Facilities = "Direct Facilities"
    ) %>%
    fmt_number(
      columns = vars(Direct_Emissions),
      decimals = 0,
      use_seps = TRUE
    ) %>%
    cols_align(
      align = "center",
      columns = everything()
    )
  
  # Store the table in the list using the year as the name.
  gt_tables_direct[[as.character(current_year)]] <- gt_table_direct
}

# Return the list of GT tables (R Markdown will render each table in the list).
gt_tables_direct
$2016
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2016
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 3,606,869 12
CALIFORNIA RESOURCES CORP 2,226,226 6
SEMPRA ENERGY INC 1,447,609 3
SENTINEL PEAK RESOURCES 871,033 1
PACIFIC GAS ELECTRIC CO 385,866 3
LINN ENERGY 334,313 3
GALESI GROUP 327,052 1
NATIONAL FUEL 286,391 1
MACPHERSON OIL CO 186,812 1
ALL AMERICAN OIL GAS INC 172,127 1
$2017
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2017
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 3,370,275 10
CALIFORNIA RESOURCES CORP 2,031,496 6
BERRY PETROLEUM CO 1,504,863 4
SEMPRA ENERGY INC 1,457,634 3
SENTINEL PEAK RESOURCES 905,212 2
GALESI GROUP 443,244 1
PACIFIC GAS ELECTRIC CO 411,741 3
NATIONAL FUEL GAS CO 321,413 1
ALL AMERICAN OIL GAS INC 210,291 1
MACPHERSON OIL CO 196,416 1
$2018
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2018
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 13,421,763 15
SHELL OIL CO 7,185,715 18
MARATHON PETROLEUM CORP 6,454,705 1
CALPINE CORP 6,334,034 14
PHILLIPS 66 4,486,697 4
VALERO ENERGY CORP 3,982,078 3
EXXONMOBIL CORP 3,878,314 17
CALIFORNIA RESOURCES CORP 3,252,823 8
PACIFIC GAS ELECTRIC CO 3,152,916 13
PBF ENERGY CO LLC 3,045,596 1
$2019
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2019
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 12,841,261 14
MARATHON PETROLEUM CORP 8,251,132 2
VOLT PARENT LP 6,287,773 14
PHILLIPS 66 4,195,435 4
VALERO ENERGY CORP 3,747,529 3
EXXON MOBIL CORP 3,747,119 17
SHELL PETROLEUM INC 3,747,119 17
PACIFIC GAS ELECTRIC CO 3,269,526 13
MARTINEZ REFINING CO LLC 3,055,157 1
CALIFORNIA RESOURCES CORP 3,026,660 8
$2020
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2020
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 11,621,789 12
VOLT PARENT LP 7,391,354 13
MARATHON PETROLEUM CORP 6,423,235 2
PHILLIPS 66 3,922,749 4
VALERO ENERGY CORP 3,792,727 3
EXXON MOBIL CORP 3,420,414 18
SHELL PETROLEUM INC 3,420,414 18
PGE CORP 3,325,554 13
MARTINEZ REFINING CO LLC 3,228,240 1
CALIFORNIA RESOURCES CORP 3,114,866 8
$2021
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2021
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 12,039,171 11
VOLT PARENT LP 7,885,685 14
MARATHON PETROLEUM CORP 6,314,556 3
PBF ENERGY INC 5,351,493 2
PHILLIPS 66 3,942,588 4
VALERO ENERGY CORP 3,612,266 3
AES CORP 3,206,345 3
PGE CORP 3,161,889 13
AIR PRODUCTS CHEMICALS INC 2,949,674 6
CALIFORNIA RESOURCES CORP 2,920,756 8
$2022
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2022
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 11,027,640 11
VOLT PARENT LP 8,043,168 14
MARATHON PETROLEUM CORP 6,511,319 2
PBF ENERGY INC 4,150,845 2
AIR PRODUCTS CHEMICALS INC 4,064,969 7
PHILLIPS 66 4,038,222 4
VALERO ENERGY CORP 3,887,653 3
AES CORP 3,502,436 3
CITY OF LOS ANGELES CALIFORNIA 2,953,048 8
TAIHEIYO CEMENT USA INC 2,893,836 3
$2023
EPA GHGP Direct Emitter Corporate Emissions in CA for Year 2023
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Direct Emissions (tCO₂e) Direct Facilities
CHEVRON CORP 11,099,322 12
CPN MANAGEMENT LP 8,275,240 14
MARATHON PETROLEUM CORP 6,695,228 2
VALERO ENERGY CORP 4,119,790 3
AIR PRODUCTS CHEMICALS INC 3,822,461 8
PHILLIPS 66 3,821,114 4
PBF ENERGY INC 3,560,930 2
AES CORP 3,063,608 3
TAIHEIYO CEMENT USA INC 2,780,971 3
CPPIB VEDDER US HOLDINGS LLC 2,657,790 18

Supplier Facilities

# Set the target state (modify as needed)
state_choice <- "CA"

# Filter supplier data for the selected state, grouping by parent company and year.
supplier_state <- epa_parent_company_suppliers %>%
  filter(facility_state == state_choice) %>%
  group_by(`Parent Company`, Year) %>%
  summarise(
    Supplier_Emissions = sum(`Emissions (tCO₂e)`, na.rm = TRUE),
    Supplier_Facilities = n(),
    .groups = "drop"
  ) %>%
  # Remove entries with zero or NA emissions.
  filter(Supplier_Emissions != 0, !is.na(Supplier_Emissions))

# Split the supplier data by year.
supplier_state_by_year <- supplier_state %>% group_split(Year)

# Initialize an empty list to store GT tables for each year.
gt_tables_supplier <- list()

# Loop through each year's data and generate a GT table for the top 10 supplier parent companies.
for (dt in supplier_state_by_year) {
  current_year <- unique(dt$Year)
  
  # Create the GT table for the current year.
  gt_table_supplier <- dt %>%
    arrange(desc(Supplier_Emissions)) %>%
    slice_head(n = 10) %>%
    select(-Year) %>%
    gt() %>%
    tab_header(
      title = paste("EPA GHGP Supplier Corporate Emissions in", state_choice, "for Year", current_year),
      subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
    ) %>%
    cols_label(
      `Parent Company` = "Parent Company",
      Supplier_Emissions = "Supplier Emissions (tCO₂e)",
      Supplier_Facilities = "Supplier Facilities"
    ) %>%
    fmt_number(
      columns = vars(Supplier_Emissions),
      decimals = 0,
      use_seps = TRUE
    ) %>%
    cols_align(
      align = "center",
      columns = everything()
    )
  
  # Store the table in the list using the year as the name.
  gt_tables_supplier[[as.character(current_year)]] <- gt_table_supplier
}

# Return the list of GT tables.
gt_tables_supplier
$2016
EPA GHGP Supplier Corporate Emissions in CA for Year 2016
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 100,418,430 4
TESORO CORP 73,736,969 2
SEMPRA ENERGY INC 48,306,059 2
PHILLIPS 66 44,661,128 3
PACIFIC GAS ELECTRIC CO 38,692,964 1
VALERO ENERGY CORP 28,696,558 3
SHELL OIL CO 17,742,849 2
PBF ENERGY CO LLC 17,009,352 1
KERN OIL REFINING CO 3,945,722 1
SAN JOAQUIN REFINING CO INC 2,294,866 1
$2017
EPA GHGP Supplier Corporate Emissions in CA for Year 2017
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 113,304,913 4
TESORO CORP 73,058,874 2
PHILLIPS 66 50,597,859 3
SEMPRA ENERGY INC 47,833,913 2
PACIFIC GAS ELECTRIC CO 38,197,517 1
VALERO ENERGY CORP 26,921,013 3
SHELL OIL CO 19,475,276 1
PBF ENERGY CO LLC 17,520,577 1
KERN OIL REFINING CO 3,974,225 1
SAN JOAQUIN REFINING CO INC 2,426,901 1
$2018
EPA GHGP Supplier Corporate Emissions in CA for Year 2018
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 110,569,818 4
TESORO CORP 68,000,981 2
PHILLIPS 66 51,187,870 3
SEMPRA ENERGY INC 44,652,736 2
PACIFIC GAS ELECTRIC CO 41,657,970 1
VALERO ENERGY CORP 31,361,500 3
PBF ENERGY CO LLC 23,414,035 1
SHELL OIL CO 19,259,493 1
KERN OIL REFINING CO 4,067,665 1
SAN JOAQUIN REFINING CO INC 2,351,613 1
$2019
EPA GHGP Supplier Corporate Emissions in CA for Year 2019
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 113,336,595 3
MARATHON PETROLEUM CORP 71,090,672 2
PHILLIPS 66 50,128,883 3
PACIFIC GAS ELECTRIC CO 42,054,663 1
SEMPRA ENERGY 41,168,271 1
VALERO ENERGY CORP 28,357,644 3
PBF ENERGY INC 21,523,032 1
MARTINEZ REFINING CO LLC 16,204,414 1
SEMPRA ENERGY INC 4,689,232 1
CASEY CO 4,139,021 1
$2020
EPA GHGP Supplier Corporate Emissions in CA for Year 2020
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 90,286,545 3
MARATHON PETROLEUM CORP 48,768,550 2
PHILLIPS 66 43,645,240 3
SEMPRA ENERGY 40,365,538 1
PGE CORP 40,301,528 1
VALERO ENERGY CORP 28,527,687 3
PBF ENERGY INC 17,623,771 1
MARTINEZ REFINING CO LLC 16,466,985 1
SEMPRA ENERGY INC 5,339,664 1
CASEY CO 3,678,940 1
$2021
EPA GHGP Supplier Corporate Emissions in CA for Year 2021
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 103,042,267 3
MARATHON PETROLEUM CORP 49,453,761 2
SEMPRA ENERGY 41,682,968 1
PGE CORP 41,556,628 1
PBF ENERGY INC 37,343,321 2
PHILLIPS 66 33,055,155 3
VALERO ENERGY CORP 27,955,809 3
SEMPRA ENERGY INC 5,295,761 1
CASEY CO 3,575,885 1
SAN JOAQUIN REFINING CO INC 2,233,481 1
$2022
EPA GHGP Supplier Corporate Emissions in CA for Year 2022
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 94,258,240 3
MARATHON PETROLEUM CORP 51,167,138 1
SEMPRA ENERGY 41,485,081 1
PGE CORP 40,917,369 1
PBF ENERGY INC 40,596,252 2
PHILLIPS 66 35,659,498 3
VALERO ENERGY CORP 27,301,506 3
SEMPRA ENERGY INC 5,573,852 1
CASEY CO 3,831,427 1
SAN JOAQUIN REFINING CO INC 2,357,338 1
$2023
EPA GHGP Supplier Corporate Emissions in CA for Year 2023
Top 10 Parent Companies by Emissions (tCO₂e)
Parent Company Supplier Emissions (tCO₂e) Supplier Facilities
CHEVRON CORP 103,804,057 3
MARATHON PETROLEUM CORP 48,306,980 1
PGE CORP 42,218,961 1
SEMPRA ENERGY 39,905,356 1
PBF ENERGY INC 36,253,854 2
PHILLIPS 66 33,366,846 3
VALERO ENERGY CORP 30,020,682 3
SEMPRA ENERGY INC 5,140,426 1
CASEY CO 3,769,504 1
SAN JOAQUIN REFINING CO INC 2,187,973 1

Merging FRS and EPA Data for Census Analysis

Downloaded the FRS National Single CSV File from the EPA’s Facility Registry Service on. This file contains information about facilities regulated under multiple EPA environmental programs. Given its size, we loaded and trimmed the dataset to retain only relevant columns (e.g., EPA Registry ID, program acronym, location data, facility name, sector). This script loads the EPA FRS data, cleans it by converting key identifiers to the correct type, and joins it with the EPA parent company emissions data. Next, it aggregates the emissions at the census block level. Later, you can merge this aggregated emissions dataset with Census data (for example, median household income or demographic data) using shared geographic identifiers (like census tract or block group).

# ------------------------------------------------------------------------------
# Facility Registry Service (FRS) National Dataset
# Source: https://www.epa.gov/frs/epa-frs-facilities-state-single-file-csv-download
# Downloaded full U.S. file (FRS_NATIONAL_SINGLE.CSV) 
# ------------------------------------------------------------------------------
# File size: ~2 GB — cleaned and trimmed to reduce memory load
# ------------------------------------------------------------------------------

# Load dataset — change this to your local filepath
#frs <- read_csv("DATA DOWNLOAD LOCATION HERE")

# Define keywords corresponding to the systems you need
#keywords <- c("E-GGRT", "EGRID", "CAMDBS", "AFS", "AIR", "TRI", "ICIS-AIR", "EMISSIONS", "GREENHOUSE GAS")
#pattern <- paste(keywords, collapse = "|") 


#frs_cleaned <- frs %>%
#  filter(grepl(pattern, INTEREST_TYPES, ignore.case = TRUE)) %>% 
#  select(REGISTRY_ID,
#         FRS_FACILITY_DETAIL_REPORT_URL,
#         LATITUDE83,
#         LONGITUDE83,
#         LOCATION_ADDRESS, 
#         CENSUS_BLOCK_CODE,
#         CITY_NAME,
#         FIPS_CODE,
#         STATE_CODE, 
#         POSTAL_CODE, 
#         COUNTY_NAME,
#         SIC_CODES,
#         NAICS_CODES,
#         HUC_CODE,
#         TRIBAL_LAND_CODE,
#         PGM_SYS_ACRNMS,
#         CONGRESSIONAL_DIST_NUM) %>% 
#  clean_names() 

#write.csv(frs_cleaned, 
          #here("Data", "Raw", "EPA_Facility_Registry_Service_Data", "FRS_NATIONAL.csv"), 
          #row.names = FALSE)
# Load the EPA FRS Query data
FRS_ID <- read_csv(here("Data", "Raw", "EPA_Facility_Registry_Service_Data", "FRS_NATIONAL.csv"))

# Convert the REGISTRY_ID column to character
FRS_ID <- FRS_ID %>% 
  mutate(registry_id = as.character(registry_id))

frs_epa_parent_company_emitters <- epa_parent_company_emitters %>%
  left_join(FRS_ID, by = c("frs_id_facility" = "registry_id")) %>%
  filter(!is.na(frs_id_facility)) %>%
  select(-all_of("frs_id_facility")) %>% 
  clean_names() %>% 
  select(year, facility, name, emissions_t_co2e, census_block_code, facility_city, facility_zip, facility_state,
  facility_county, parent_company, parent_company_emissions_t_co2e, longitude=longitude83, latitude=latitude83,
  frs_facility_detail_report_url)

# Calculate total emissions per census block
emissions_by_census_block <- frs_epa_parent_company_emitters %>%
  group_by(census_block_code, facility_county, facility_state, year) %>%
  summarise(
    total_emissions_t_co2e = sum(emissions_t_co2e, na.rm = TRUE),
    facility_count = n()
  ) 

write.csv(emissions_by_census_block, 
          here::here("Data", "Processed", "EPA_GHGP_Emissions_by_Census_Block.csv"),
          row.names = FALSE)